from msilib.schema import Icon
from turtle import color
import folium
import pandas as pd
import numpy as np
#specific functions to call
# Marker: interactive point
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
# icon options: beautify_icon
# from folium.plugins.beautify_icon import BeautifyIcon
# clustering 好多setup救命
from folium import plugins
#from sklearn.cluster import KMeans
#from sklearn.decomposition import PCA
import matplotlib.cm as cm
import matplotlib.colors as colors
#shp file
import shapefile
from shapely.geometry import Point, Polygon, LineString
def setup(input_name):
#set up: read in data for df for single family dataframe
# make df available through the file
global df
# for csv file
if input_name[-1] == "v":
df = pd.read_csv(input_name)
df_name=input_name[0:-4]
# for xlsx file
else:
# for EXCEL | sheet_name可有可无
# df = pd.read_excel(input_name,sheet_name="wantedSheetName")
df = pd.read_excel(input_name)
df_name=input_name[0:-5]
# show what options we have
df.head()
# basci info for ATL mapping
# map = folium.Map(location = [33.779191,-84.369],zoom_start =10,prefer_canvas=True)
return df_name
def setupList(input_name):
# for csv file
if input_name[-1] == "v":
df = pd.read_csv(input_name)
# for xlsx file
else:
# for EXCEL | sheet_name可有可无
df = pd.read_excel(input_name)
return df
def pop_up_map(df_name):
# return map with popup information
# only works well with around 1000 data
map1 = folium.Map(location = [33.779191,-84.369],zoom_start =10,prefer_canvas=True)
index=0
for idx,row in df.iterrows():
Marker([row['Latitude'],row['Longitude']],popup=row['Price']).add_to(map1)
index+=1
if index == 1000:
break
name = "popUpMap_"+df_name+".html"
map1.save(name)
return
def heat_map(df_name):
# return heatmap showing density of data in the area
# https://zhuanlan.zhihu.com/p/392687123
map2 = folium.Map(location = [33.779191,-84.369],zoom_start =10,prefer_canvas=True)
HeatMap(data = df[['Latitude','Longitude']]).add_to(map2)
name = "heatMap_"+df_name+".html"
map2.save(name)
return
def clustering_map(df_name, ):
# 能不能加一层description text/ 图例
# +cluster description: mean price/ why group
# 如果是距离近的话可以用这类 #这在note什么啊没懂
# https://zhuanlan.zhihu.com/p/350647526
map3 = folium.Map(location = [33.779191,-84.369],zoom_start =10,prefer_canvas=True)
# set color scheme for the clusters
x = np.arange(num_cluster) #cluster的个数
ys = [i + x + (i*x)**2 for i in range(num_cluster)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
#zip():挑拣并打包需要用到的col
for lat, lng, cluster, street in zip(df['Latitude'], df['Longitude'], df['Cluster_ID'],df['Street']):
#label = folium.Popup(str(city)+ ','+str(state) + '- Cluster ' + str(cluster), parse_html=True)
folium.vector_layers.CircleMarker(
[lat, lng],
radius=3,# 想要多大个circle
tooltip = str(street)+',Cluster '+ str(cluster), # more features could be add on if wanted
color=rainbow[cluster-1],
fill=True,
fill_color=rainbow[cluster-1],
fill_opacity=0.9).add_to(map3)
name = "clusteringMap_"+df_name+".html"
map3.save(name)
return
def test_cluster(df_name):
# TESTING:
# clustering only base on the density of data geographically
map4 = folium.Map(location = [33.779191,-84.369],zoom_start =10,prefer_canvas=True)
# create a mark cluster object
marker_cluster = MarkerCluster().add_to(map4)
# set color scheme for the clusters
x = np.arange(5) #cluster的个数
ys = [i + x + (i*x)**2 for i in range(5)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
#zip():挑拣并打包需要用到的col
for lat, lng, cluster, street in zip(df['Latitude'], df['Longitude'], df['Cluster_ID'],df['Street']):
#label = folium.Popup(str(city)+ ','+str(state) + '- Cluster ' + str(cluster), parse_html=True)
folium.vector_layers.CircleMarker(
[lat, lng],
radius=3,# 想要多大个circle
tooltip = str(street)+',Cluster '+ str(cluster), # more features could be add on if wanted
color=rainbow[cluster-1],
fill=True,
fill_color=rainbow[cluster-1],
fill_opacity=0.9).add_to(marker_cluster) # 将这些经纬度数据加入聚类
# add marker_cluster to map
map4.add_child(marker_cluster)
name = "clusteringMapTesting_"+df_name+".html"
map4.save(name)
return
def checkR(df_name,num_group):
# show the level of residual for each location and mark based on the percentile
map5 = folium.Map(location = [33.779191,-84.369],zoom_start =10,prefer_canvas=True)
# percentile 要*100 + "%"
# 用 cirle marker: 标记percentile和residual value
for lat, lng, error_value, p, error_flag,zc in zip(df['Latitude'], df['Longitude'], df['Error'],df['ab_percentile'],df['error_flag'],df['Zip']):
per = float(p) *100
if error_flag == 1:
# check count(1) VS density of housing in the zipcode
Marker([lat,lng],popup="Over 95%!" +"\n Error Value:"+ str(round(error_value,3))+"\n Percentile:"+str(round(per,3))+"%,"+str(zc)).add_to(map5)
else:
# RBG checker
rainbow =["#DAF7A6","#FFC300", "#FF5733", "#900C3F","#581845"]
group = int(per//20)
folium.vector_layers.CircleMarker(
[lat, lng],
radius=4,# 想要多大个circle
tooltip = "Error Value:"+ str(round(error_value,3))+" Percentile:"+str(round(per,3))+"%,"+str(zc),
fill_color = rainbow[group],
color = rainbow[group],
fill =True,
fill_opacity = 0.9).add_to(map5)
name ="NEWSEEResiCheck_"+df_name+".html"
map5.save(name)
return
def insert_percentileGroup(col):
# # insert a col with the percentile for each
# 求rank
# 用跳跃排名 从value大的开始排第一 有可能有重复rank
percentile_list = list()
# 新创一个col and noted as RankNumer
df['ab_RankNumber'] = df[col].rank(method='min',ascending=False)
# CHECK: count 一下每组大概return的个数
# 指定col长度
w = len(df[col])
for rRank in df['ab_RankNumber']:
# checkdensity of
percentile = 1-(rRank / w)
percentile_list.append(percentile)
df.insert(df.shape[1], "ab_percentile", percentile_list, True) # 前面number用了一行
return
def multiLayer():
inputList = ["SingleFamilyRentalsAtlantaArea_07072022_clustered_k=10.csv",
"SingleFamilyRentalsAtlantaArea_07072022_clustered_k=9.csv",
"SingleFamilyRentalsAtlantaArea_07072022_clustered_k=8.csv",
"SingleFamilyRentalsAtlantaArea_07072022_clustered_k=7.csv",
"SingleFamilyRentalsAtlantaArea_07072022_clustered_k=6.csv",
"SingleFamilyRentalsAtlantaArea_07072022_clustered_k=5.csv"]
multiMap = folium.Map(location = [33.779191,-84.369],zoom_start =10,prefer_canvas=True)
pointsgroup = folium.FeatureGroup(name='Points_Layer', control=True)
# set color scheme for the clusters
x = np.arange(10) #cluster的max个数
ys = [i + x + (i*x)**2 for i in range(10)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
# setup layers
for file in inputList:
# print(file)
df=setupList(file) # readin file 但是后面会cover掉
dfName = file[-7:-4]
pointsgroup = folium.FeatureGroup(name=dfName, control=True)
for lat, lng, cluster, mPrice in zip(df['Latitude'], df['Longitude'], df['Cluster_ID'],df['Mean_Price']):
folium.vector_layers.CircleMarker(
[lat, lng],
radius=3,# 想要多大个circle
tooltip = 'cluster'+str(cluster)+',cluster mean price:'+ str(mPrice), # more features could be add on if wanted
color=rainbow[cluster-1],
fill=True,
fill_color=rainbow[cluster-1],
fill_opacity=0.9).add_to(pointsgroup)
# map and add to multiMap
multiMap.add_child(pointsgroup)
folium.LayerControl().add_to(multiMap)
return multiMap
def censusPolyShape():
# add the polygon layer of the social science data from census
# input map/group and return the map and group done
# OR take global
map6 = folium.Map(location = [33.779191,-84.369],zoom_start =10,prefer_canvas=True)
return # return the layer with census data
def main():
# df 是global unique 变量,尽量不要重新指向
input_name="0725_rf_error_analysis_with_flag.csv"
df_name=setup(input_name) # ONLY read .CSV OR .XLSX
# # Options:
# pop_up_map(df_name)
# heat_map(df_name)
# for the residuals
# input the selected col and insert the percentile for that col
insert_percentileGroup("abs_error")
# num_cluster = 5
# clustering_map(df_name,num_cluster)
# multiLayer()
# test_cluster(df_name)
checkR(df_name,5)
# print(df_name)
# df.info()
# main()
map= multiLayer()
map